rm(list=ls())
library(plyr)
library(dplyr)
library(stargazer)
library(ggplot2)
library(stringr)
library(ri2)
library(boot)
library(lfe)
library(sjPlot)
library(tidyr)
library(MASS)
library(foreign)
library(Hmisc)
library(sna)
library(gridExtra)
library(grDevices)
library(sandwich)
library(reshape2)
library(parallel)
library(xtable)
library(Cairo)

###set WD:
setwd() #set to your local working directory


##########
# Preparing the data:
##########

###load data with 2015 MORENA PR list candidates:
tombola <- read.csv("MORENA_list.csv", header=T, sep=",")

###data cleaning:
tombola$fecha_nacimiento <- as.Date(tombola$fecha_nacimiento, "%d/%m/%Y")
tombola$fecha_afiliacion <- as.Date(tombola$fecha_afiliacion, "%d/%m/%Y")
tombola$age <- as.numeric(floor((as.Date("7/6/2015","%d/%m/%Y") - tombola$fecha_nacimiento)/365)) #calculating the age on election day in 2015
              
#subset to lottery candidates:
tombola <- subset(tombola, tombola$internal==1)


###initial balance tests:
#Table 2:

balance.tests.diputado <- rbind(
  c("Female",t.test(tombola$genero[tombola$diputado_dummy==0],tombola$genero[tombola$diputado_dummy==1])$estimate,t.test(tombola$genero[tombola$diputado_dummy==0],tombola$genero[tombola$diputado_dummy==1])$estimate[2]-t.test(tombola$genero[tombola$diputado_dummy==0],tombola$genero[tombola$diputado_dummy==1])$estimate[1],t.test(tombola$genero[tombola$diputado_dummy==0],tombola$genero[tombola$diputado_dummy==1])$p.value,length(!is.na(tombola$genero))),
  c("tiempo_de_residencia",t.test(tombola$tiempo_de_residencia[tombola$diputado_dummy==0],tombola$tiempo_de_residencia[tombola$diputado_dummy==1])$estimate,t.test(tombola$tiempo_de_residencia[tombola$diputado_dummy==0],tombola$tiempo_de_residencia[tombola$diputado_dummy==1])$estimate[2]-t.test(tombola$tiempo_de_residencia[tombola$diputado_dummy==0],tombola$tiempo_de_residencia[tombola$diputado_dummy==1])$estimate[1],t.test(tombola$tiempo_de_residencia[tombola$diputado_dummy==0],tombola$tiempo_de_residencia[tombola$diputado_dummy==1])$p.value,length(!is.na(tombola$tiempo_de_residencia))),
  c("Age",t.test(tombola$age[tombola$diputado_dummy==0],tombola$age[tombola$diputado_dummy==1])$estimate,t.test(tombola$age[tombola$diputado_dummy==0],tombola$age[tombola$diputado_dummy==1])$estimate[2]-t.test(tombola$age[tombola$diputado_dummy==0],tombola$age[tombola$diputado_dummy==1])$estimate[1],t.test(tombola$age[tombola$diputado_dummy==0],tombola$age[tombola$diputado_dummy==1])$p.value,length(!is.na(tombola$age))))

colnames(balance.tests.diputado) <- c("Variable","Control","Treatment","Diff. means","p-value","n")
balance.tests.diputado[,2] <- round(as.numeric(balance.tests.diputado[,2]),4)
balance.tests.diputado[,3] <- round(as.numeric(balance.tests.diputado[,3]),4)
balance.tests.diputado[,4] <- round(as.numeric(balance.tests.diputado[,4]),4)
balance.tests.diputado[,5] <- round(as.numeric(balance.tests.diputado[,5]),4)

balance.tests.diputado[,1] <- c("Female","Years of Residence","Age")

balance.tests.diputado

#Table 2:
stargazer(balance.tests.diputado, out="Table2.tex")

#F-Test:
f_test_balance_diputado <- lm(diputado_dummy ~ genero + tiempo_de_residencia + age, data=tombola)
summary(f_test_balance_diputado)


###merge with 2018 electoral data:
load("elections_2018_municipio.Rda")

#some data cleaning:
elections_2018_municipio <- as.data.frame(elections_2018_municipio)
colnames(elections_2018_municipio)[2] <- "municipio_residencia"
tombola$municipio_residencia <- as.character(tombola$municipio_residencia)
elections_2018_municipio$municipio_residencia <- as.character(elections_2018_municipio$municipio_residencia)

for (r in 1:nrow(tombola)){
  tombola$municipio_residencia[r] <- toupper(tombola$municipio_residencia[r])
}

#code state names consistently with other dataset:
elections_2018_municipio$estado_short <- elections_2018_municipio$estado
elections_2018_municipio$estado_short[elections_2018_municipio$estado_short=="YUCATAN"] <- "YUC"
elections_2018_municipio$estado_short[elections_2018_municipio$estado_short=="COAHUILA"] <- "COAH"
elections_2018_municipio$estado_short[elections_2018_municipio$estado_short=="OAXACA"] <- "OAX"
elections_2018_municipio$estado_short[elections_2018_municipio$estado_short=="CHIAPAS"] <- "CHIS"
elections_2018_municipio$estado_short[elections_2018_municipio$estado_short=="PUEBLA"] <- "PUE"
elections_2018_municipio$estado_short[elections_2018_municipio$estado_short=="GUANAJUATO"] <- "GTO"
elections_2018_municipio$estado_short[elections_2018_municipio$estado_short=="MEXICO"] <- "MEX"
elections_2018_municipio$estado_short[elections_2018_municipio$estado_short=="NAYARIT"] <- "NAY"
elections_2018_municipio$estado_short[elections_2018_municipio$estado_short=="GUERRERO"] <- "GRO"
elections_2018_municipio$estado_short[elections_2018_municipio$estado_short=="JALISCO"] <- "JAL"
elections_2018_municipio$estado_short[elections_2018_municipio$estado_short=="HIDALGO"] <- "HGO"
elections_2018_municipio$estado_short[elections_2018_municipio$estado_short=="VERACRUZ"] <- "VER"
elections_2018_municipio$estado_short[elections_2018_municipio$estado_short=="SONORA"] <- "SON"
elections_2018_municipio$estado_short[elections_2018_municipio$estado_short=="TLAXCALA"] <- "TLAX"
elections_2018_municipio$estado_short[elections_2018_municipio$estado_short=="MICHOACAN"] <- "MICH"
elections_2018_municipio$estado_short[elections_2018_municipio$estado_short=="NUEVO LEON"] <- "NL"
elections_2018_municipio$estado_short[elections_2018_municipio$estado_short=="AGUASCALIENTES"] <- "AGS"
elections_2018_municipio$estado_short[elections_2018_municipio$estado_short=="SINALOA"] <- "SIN"
elections_2018_municipio$estado_short[elections_2018_municipio$estado_short=="SAN LUIS POTOSI"] <- "SLP"
elections_2018_municipio$estado_short[elections_2018_municipio$estado_short=="CHIHUAHUA"] <- "CHIH"
elections_2018_municipio$estado_short[elections_2018_municipio$estado_short=="TAMAULIPAS"] <- "TAMPS"
elections_2018_municipio$estado_short[elections_2018_municipio$estado_short=="CIUDAD DE MEXICO"] <- "CDMX"
elections_2018_municipio$estado_short[elections_2018_municipio$estado_short=="MORELOS"] <- "MOR"
elections_2018_municipio$estado_short[elections_2018_municipio$estado_short=="QUERETARO"] <- "QRO"
elections_2018_municipio$estado_short[elections_2018_municipio$estado_short=="ZACATECAS"] <- "ZAC"
elections_2018_municipio$estado_short[elections_2018_municipio$estado_short=="COLIMA"] <- "COL"
elections_2018_municipio$estado_short[elections_2018_municipio$estado_short=="QUINTANA ROO"] <- "QROO"
elections_2018_municipio$estado_short[elections_2018_municipio$estado_short=="TABASCO"] <- "TAB"
elections_2018_municipio$estado_short[elections_2018_municipio$estado_short=="CAMPECHE"] <- "CAM"
elections_2018_municipio$estado_short[elections_2018_municipio$estado_short=="DURANGO"] <- "DGO"
elections_2018_municipio$estado_short[elections_2018_municipio$estado_short=="BAJA CALIFORNIA SUR"] <- "BCS"
elections_2018_municipio$estado_short[elections_2018_municipio$estado_short=="BAJA CALIFORNIA"] <- "BC"
  
#unique state-municipality id:
tombola$id_mun <- paste(tombola$estado_residencia, tombola$municipio_residencia, sep="_")
elections_2018_municipio$id_mun <- paste(elections_2018_municipio$estado_short, elections_2018_municipio$municipio, sep="_")

#number of candidates per municipality in treatment or control:
mun <- tombola %>% 
  group_by(id_mun) %>% 
  count(internal)
  
#number of candidates per municipality in treatment:
mun_t <- subset(tombola, diputado_dummy==1) %>% 
  group_by(id_mun) %>% 
  count(diputado_dummy)

#number of candidates per municipality in control:
mun_c <- subset(tombola, diputado_dummy==0) %>% 
  group_by(id_mun) %>% 
  count(diputado_dummy)

municipalities <- left_join(elections_2018_municipio,mun)
names(municipalities)[20] <- "tombola"
municipalities <- municipalities[,c(1:18,20)]

municipalities <- left_join(municipalities,mun_t)
names(municipalities)[21] <- "deputy"
municipalities <- municipalities[,c(1:19,21)]

municipalities <- left_join(municipalities,mun_c)
names(municipalities)[22] <- "candidate"
municipalities <- municipalities[,c(1:20,22)]

municipalities_full <- municipalities
municipalities_full$deputy <- ifelse(is.na(municipalities_full$deputy),0,municipalities_full$deputy)
municipalities_full$candidate <- ifelse(is.na(municipalities_full$candidate),0,municipalities_full$candidate)

#coding main treatment variable (elected deputy):
municipalities_full$diputado_dummy <- ifelse(municipalities_full$deputy > 0, 1, 0)

municipalities <- subset(municipalities, !is.na(tombola))
municipalities$deputy <- ifelse(is.na(municipalities$deputy),0,municipalities$deputy)
municipalities$candidate <- ifelse(is.na(municipalities$candidate),0,municipalities$candidate)

municipalities$diputado_dummy <- ifelse(municipalities$deputy > 0, 1, 0)


##add information other other deputies from that municipality:

#municipalities that were (also) represented by other deputy:
mun_mult_elect_1 <- unique(subset(tombola, mult_elect==1)$id_mun)

#municipalities that were not represented by other deputy:
mun_mult_elect_0 <- unique(subset(tombola, mult_elect==0)$id_mun)

municipalities$mult_elect <- ifelse(municipalities$id_mun %in% mun_mult_elect_0, 0, NA) #municipalities that were not represented by other deputy
municipalities$mult_elect <- ifelse(municipalities$id_mun %in% mun_mult_elect_1, 1, municipalities$mult_elect) #municipalities that were (also) represented by other deputy

#states in each multi-state constituency:
circun_1 <- unique(subset(tombola, circunscripcion==1)$estado_residencia)
circun_2 <- unique(subset(tombola, circunscripcion==2)$estado_residencia)
circun_3 <- unique(subset(tombola, circunscripcion==3)$estado_residencia)
circun_4 <- unique(subset(tombola, circunscripcion==4)$estado_residencia)
circun_5 <- unique(subset(tombola, circunscripcion==5)$estado_residencia)


municipalities$circunscripcion <- ifelse(municipalities$estado_short %in% circun_1, 1, NA)
municipalities$circunscripcion <- ifelse(municipalities$estado_short %in% circun_2, 2, municipalities$circunscripcion)
municipalities$circunscripcion <- ifelse(municipalities$estado_short %in% circun_3, 3, municipalities$circunscripcion)
municipalities$circunscripcion <- ifelse(municipalities$estado_short %in% circun_4, 4, municipalities$circunscripcion)
municipalities$circunscripcion <- ifelse(municipalities$estado_short %in% circun_5, 5, municipalities$circunscripcion)

###add election results from 2012 (baseline):
load("elections_2012_municipio.Rda")

#some data cleaning:
elections_2012_municipio <- as.data.frame(elections_2012_municipio)

#create a unique ID for each municipality:
elections_2012_municipio$id <- paste0(elections_2012_municipio$id_estado,elections_2012_municipio$municipio)
municipalities$id <- paste0(municipalities$id_estado,municipalities$municipio)

municipalities_full$id <- paste0(municipalities_full$id_estado,municipalities_full$municipio)

#merge the data sets:
municipalities <- join(municipalities,elections_2012_municipio,by="id",type="left")
municipalities_full <- join(municipalities_full,elections_2012_municipio,by="id",type="left")

#municipalities with valid lottery candidates:
valid_tombola <- subset(municipalities, !is.na(tombola))

#municipalities with valid deputy:
valid_deputy <- subset(municipalities, !is.na(deputy))

#municipalities with valid candidate:
valid_candidate <- subset(municipalities, !is.na(candidate))

#municipalities with valid diputado_dummy:
valid_diputado_dummy <- subset(municipalities, !is.na(diputado_dummy))

#number of candidates per constituency:
const_tombola <- valid_diputado_dummy %>% 
  group_by(circunscripcion) %>% 
  summarise(const_tombola = sum(tombola))

#number of elected deputies per constituency:
const_deputy <- valid_diputado_dummy %>% 
  group_by(circunscripcion) %>% 
  summarise(const_deputy = sum(deputy))

valid_diputado_dummy <- left_join(valid_diputado_dummy,const_tombola)
valid_diputado_dummy <- left_join(valid_diputado_dummy,const_deputy)

##probability of treatment assignment:
#for any given candidate: number of candidates elected from a list / length of list
#for any given municipality: prob of a candidate getting elected * number of candidates

valid_diputado_dummy$prob <- (valid_diputado_dummy$const_deputy/valid_diputado_dummy$const_tombola) * valid_diputado_dummy$tombola
valid_diputado_dummy$ipw <- ifelse(valid_diputado_dummy$diputado_dummy==1, 1 / valid_diputado_dummy$prob, 1 / ( 1 / 1- valid_diputado_dummy$prob))

save(valid_diputado_dummy, file="valid_diputado_dummy.RDa") #save data to use for additional analyses


###Merge with party membership data:

#load membership data:
load("padron.RDa")
padron$fecha_afiliacion <- as.Date(padron$fecha_afiliacion, "%Y-%m-%d")

#recode dates of joining the party:
padron$pre_tombola <- ifelse(padron$fecha_afiliacion < as.Date("2015-06-07","%Y-%m-%d"), 1, 0)
padron$new2012 <- ifelse(padron$fecha_afiliacion > as.Date("2011-12-31","%Y-%m-%d") & padron$fecha_afiliacion < as.Date("2013-01-01","%Y-%m-%d"), 1, 0)
padron$new2013 <- ifelse(padron$fecha_afiliacion > as.Date("2012-12-31","%Y-%m-%d") & padron$fecha_afiliacion < as.Date("2014-01-01","%Y-%m-%d"), 1, 0)
padron$new2014 <- ifelse(padron$fecha_afiliacion > as.Date("2013-12-31","%Y-%m-%d") & padron$fecha_afiliacion < as.Date("2015-01-01","%Y-%m-%d"), 1, 0)
padron$new2015 <- ifelse(padron$fecha_afiliacion > as.Date("2015-06-06","%Y-%m-%d") & padron$fecha_afiliacion < as.Date("2016-01-01","%Y-%m-%d"), 1, 0)
padron$new2016 <- ifelse(padron$fecha_afiliacion > as.Date("2015-12-31","%Y-%m-%d") & padron$fecha_afiliacion < as.Date("2017-01-01","%Y-%m-%d"), 1, 0)
padron$new2017 <- ifelse(padron$fecha_afiliacion > as.Date("2016-12-31","%Y-%m-%d") & padron$fecha_afiliacion < as.Date("2018-01-01","%Y-%m-%d"), 1, 0)
padron$new2018 <- ifelse(padron$fecha_afiliacion > as.Date("2017-12-31","%Y-%m-%d") & padron$fecha_afiliacion < as.Date("2019-01-01","%Y-%m-%d"), 1, 0)

#calculate number of party members by municipality:
padron_mun <- 
padron %>%
  group_by(entidad, mun) %>%
  summarise(pre_tombola = sum(pre_tombola,na.rm=T), new2012 = sum(new2012,na.rm =T), new2013 = sum(new2013,na.rm =T), new2014 = sum(new2014,na.rm =T), new2015 = sum(new2015,na.rm=T), new2016 = sum(new2016,na.rm=T),
            new2017 = sum(new2017,na.rm=T), new2018 = sum(new2018,na.rm=T))

#correcting some encoding errors:
padron_mun$mun[padron_mun$mun=="GUSTAVO A MADERO"] <- "GUSTAVO A. MADERO"

padron_mun$mun_id <- paste0(padron_mun$entidad,"_",padron_mun$mun)

#merge with valid_diputado_dummy:
valid_diputado_dummy$mun_id <- paste0(valid_diputado_dummy$estado,"_",valid_diputado_dummy$municipio_residencia)
valid_diputado_dummy <- left_join(valid_diputado_dummy,padron_mun, by= "mun_id")


###recode a few variables for analysis:
#number total MORENA members by end of 2018:
padron$total <- padron$new2012 + padron$new2013 + padron$new2014 + padron$new2015 + padron$new2016 + padron$new2017 + padron$new2018
sum(padron$total, na.rm = T)

#calculate overall membership rate:
sum(padron$total, na.rm = T)/sum(municipalities_full$lista_nominal_deputies)

#Proportion of registered voters who are affiliated with the party PRIOR to the lottery in percent:
valid_diputado_dummy$pre_tombola_percent <- valid_diputado_dummy$pre_tombola / valid_diputado_dummy$lista_nominal_deputies

#Proportion of registered voters who are affiliated with the party PRIOR to the lottery per 100,000:
valid_diputado_dummy$pre_tombola_per <- valid_diputado_dummy$pre_tombola / valid_diputado_dummy$lista_nominal_deputies * 100000

#Number of voters who affiliated with the party AFTER to the lottery:
valid_diputado_dummy$combined_padron <- valid_diputado_dummy$new2015 + valid_diputado_dummy$new2016 + valid_diputado_dummy$new2017 + valid_diputado_dummy$new2018

#Proportion of registered voters who affiliated with the party AFTER to the lottery in percent:
valid_diputado_dummy$combined_padron_percent <- valid_diputado_dummy$combined_padron / valid_diputado_dummy$lista_nominal_deputies

#Proportion of registered voters who affiliated with the party AFTER to the lottery per 100,000:
valid_diputado_dummy$combined_padron_per <- valid_diputado_dummy$combined_padron / valid_diputado_dummy$lista_nominal_deputies * 100000

#Combined proportion of registered voters who affiliated with the party before or after the lottery in percent:
valid_diputado_dummy$combined_padron_total_percent <- valid_diputado_dummy$combined_padron_percent + valid_diputado_dummy$pre_tombola_percent

#Combined proportion of registered voters who affiliated with the before or after the lottery per 100,000:
valid_diputado_dummy$combined_padron_total_per <- valid_diputado_dummy$combined_padron_per + valid_diputado_dummy$pre_tombola_per


##########
# MAIN ANALYSES:
##########

###Local party membership (in percent):

##Effect on proportion of registered voters who affiliated with the party at baseline (i.e., BEFORE to the lottery) in percent:

#IPW:
pre_tombola_percent_ipw_cl <- felm(pre_tombola_percent ~ diputado_dummy | 0 | 0 | circunscripcion + tombola, data=valid_diputado_dummy, weights=valid_diputado_dummy$ipw)
pre_tombola_percent_ipw <- felm(pre_tombola_percent ~ diputado_dummy | 0 | 0 | 0, data=valid_diputado_dummy, weights=valid_diputado_dummy$ipw)

#FE Model:
pre_tombola_percent_fe <- felm(pre_tombola_percent ~ diputado_dummy | tombola + circunscripcion | 0 | estado + tombola, data=valid_diputado_dummy)


##Effect on proportion of registered voters who affiliated with the party AFTER to the lottery in percent:

#IPW:
combined_padron_percent_ipw_cl <- felm(combined_padron_percent ~ diputado_dummy | 0 | 0 | circunscripcion + tombola, data=valid_diputado_dummy, weights=valid_diputado_dummy$ipw)
combined_padron_percent_ipw <- felm(combined_padron_percent ~ diputado_dummy | 0 | 0 | 0, data=valid_diputado_dummy, weights=valid_diputado_dummy$ipw)

#FE Model:
combined_padron_percent_fe <- felm(combined_padron_percent ~ diputado_dummy | tombola + circunscripcion | 0 | 0, data=valid_diputado_dummy)


##Effect on proportion of registered voters who affiliated with the party AFTER to the lottery in percent, 
##controlling for the number of party members before the lottery:

#IPW:
combined_padron_percent_control_ipw_cl <- felm(combined_padron_percent ~ diputado_dummy + pre_tombola_percent | 0 | 0 | circunscripcion + tombola, data=valid_diputado_dummy, weights=valid_diputado_dummy$ipw)
combined_padron_percent_control_ipw <- felm(combined_padron_percent ~ diputado_dummy  + pre_tombola_percent | 0 | 0 | 0, data=valid_diputado_dummy, weights=valid_diputado_dummy$ipw)

#FE Model:
combined_padron_percent_control_fe <- felm(combined_padron_percent ~ diputado_dummy + pre_tombola_percent | tombola + circunscripcion | 0 | 0, data=valid_diputado_dummy)


##Effect on total proportion of registered voters who affiliated by the end of 2018 in percent:

#IPW:
combined_padron_total_percent_ipw_cl <- felm(combined_padron_total_percent ~ diputado_dummy | 0 | 0 | circunscripcion + tombola, data=valid_diputado_dummy, weights=valid_diputado_dummy$ipw)
combined_padron_total_percent_ipw <- felm(combined_padron_total_percent ~ diputado_dummy | 0 | 0 | 0, data=valid_diputado_dummy, weights=valid_diputado_dummy$ipw)

#FE Model:
combined_padron_total_percent_fe <- felm(combined_padron_total_percent ~ diputado_dummy  | tombola + circunscripcion | 0 | 0, data=valid_diputado_dummy)


##Effect on total proportion of registered voters who affiliated by the end of 2018 in percent:
##controlling for the number of party members before the lottery:

#IPW:
combined_padron_total_control_percent_ipw_cl <- felm(combined_padron_total_percent ~ diputado_dummy + pre_tombola_percent | 0 | 0 | circunscripcion + tombola, data=valid_diputado_dummy, weights=valid_diputado_dummy$ipw)
combined_padron_total_control_percent_ipw <- felm(combined_padron_total_percent ~ diputado_dummy + pre_tombola_percent | 0 | 0 | 0, data=valid_diputado_dummy, weights=valid_diputado_dummy$ipw)

#FE Model:
combined_padron_total_control_percent_fe <- felm(combined_padron_total_percent ~ diputado_dummy + pre_tombola_percent | tombola + circunscripcion | 0 | 0, data=valid_diputado_dummy)


###Local party membership (rescaled -- per 100,000 registered votes):

##Effect on proportion of registered voters who are affiliated with the party PRIOR to the lottery per 100,000:

#IPW:
pre_tombola_per_ipw_cl <- felm(pre_tombola_per ~ diputado_dummy | 0 | 0 | circunscripcion + tombola, data=valid_diputado_dummy, weights=valid_diputado_dummy$ipw)
pre_tombola_per_ipw <- felm(pre_tombola_per ~ diputado_dummy | 0 | 0 | 0, data=valid_diputado_dummy, weights=valid_diputado_dummy$ipw)

#FE Model:
pre_tombola_per_fe <- felm(pre_tombola_per ~ diputado_dummy | tombola + circunscripcion | 0 | estado + tombola, data=valid_diputado_dummy)


##Effect on proportion of registered voters who affiliated with the party AFTER to the lottery per 100,000:

#IPW:
combined_padron_per_ipw_cl <- felm(combined_padron_per ~ diputado_dummy | 0 | 0 | circunscripcion + tombola, data=valid_diputado_dummy, weights=valid_diputado_dummy$ipw)
combined_padron_per_ipw <- felm(combined_padron_per ~ diputado_dummy | 0 | 0 | 0, data=valid_diputado_dummy, weights=valid_diputado_dummy$ipw)

#FE Model:
combined_padron_per_fe <- felm(combined_padron_per ~ diputado_dummy | tombola + circunscripcion | 0 | 0, data=valid_diputado_dummy)


##Effect on proportion of registered voters who affiliated with the party AFTER to the lottery per 100,000:
##controlling for the number of party members before the lottery:

#IPW:
combined_padron_per_control_ipw_cl <- felm(combined_padron_per ~ diputado_dummy + pre_tombola_per | 0 | 0 | circunscripcion + tombola, data=valid_diputado_dummy, weights=valid_diputado_dummy$ipw)
combined_padron_per_control_ipw <- felm(combined_padron_per ~ diputado_dummy  + pre_tombola_per | 0 | 0 | 0, data=valid_diputado_dummy, weights=valid_diputado_dummy$ipw)

#FE Model:
combined_padron_per_control_fe <- felm(combined_padron_per ~ diputado_dummy + pre_tombola_per | tombola + circunscripcion | 0 | 0, data=valid_diputado_dummy)


##Effect on total proportion of registered voters who affiliated by the end of 2018 per 100,000:

#IPW:
combined_padron_total_per_ipw_cl <- felm(combined_padron_total_per ~ diputado_dummy | 0 | 0 | circunscripcion + tombola, data=valid_diputado_dummy, weights=valid_diputado_dummy$ipw)
combined_padron_total_per_ipw <- felm(combined_padron_total_per ~ diputado_dummy | 0 | 0 | 0, data=valid_diputado_dummy, weights=valid_diputado_dummy$ipw)

#FE Model:
combined_padron_total_per_fe <- felm(combined_padron_total_per ~ diputado_dummy  | tombola + circunscripcion | 0 | 0, data=valid_diputado_dummy)


###Combined results:

#function to adjust p values for one tailed tests:
one_tailed <- function(x){
  p_one_tailed <- x / 2
  return(p_one_tailed)
}

#calculate percent change relative to control:
percent_change_combined <- paste0("+",round(combined_padron_percent_ipw_cl$coefficients[2]/combined_padron_percent_ipw_cl$coefficients[1]*100,0),"%")
percent_change_total <- paste0("+",round(combined_padron_total_percent_ipw_cl$coefficients[2]/combined_padron_total_percent_ipw_cl$coefficients[1]*100,0),"%")


##Table 4:
stargazer(combined_padron_per_ipw_cl, combined_padron_total_per_ipw_cl, digits=2, 
          dep.var.caption = "Outcome:",
          dep.var.labels = c("New Membership","Total Membership"),
          covariate.labels = c("Treatment Effect","Constant (Control Mean)"),
          type="latex", 
          apply.p = one_tailed,
          add.lines = list(c("Percent Change (Rel. to Control)", percent_change_combined, percent_change_total)),
          keep.stat=c("n"),
          notes.append = T,
          no.space=TRUE,
          out="Table4.tex")

##Table D1:
stargazer(combined_padron_per_ipw, combined_padron_total_per_ipw, digits=2, 
          dep.var.caption = "Outcome:",
          dep.var.labels = c("New Membership","Total Membership"),
          covariate.labels = c("Treatment Effect","Constant (Control Mean)"),
          type="latex", 
          apply.p = one_tailed,
          add.lines = list(c("Percent Change (Rel. to Control)", percent_change_combined, percent_change_total)),
          keep.stat=c("n"),
          notes.append = T,
          no.space=TRUE, 
          out="TableD1.tex")


##Table 5:
stargazer(pre_tombola_per_ipw_cl, combined_padron_per_control_ipw_cl, digits=2, 
          dep.var.caption = "Outcome:",
          dep.var.labels = c("Baseline Membership","New Membership"),
          covariate.labels = c("Treatment Effect","Baseline Membership","Constant (Control Mean)"),
          type="latex", 
          apply.p = one_tailed,
          keep.stat=c("n"),
          notes.append = T,
          no.space=TRUE, 
          out="Table5.tex")

##Table D2:
stargazer(pre_tombola_per_ipw, combined_padron_per_control_ipw, digits=2, 
          dep.var.caption = "Outcome:",
          dep.var.labels = c("Baseline Membership","New Membership"),
          covariate.labels = c("Treatment Effect","Baseline Membership","Constant (Control Mean)"),
          type="latex", 
          apply.p = one_tailed,
          keep.stat=c("n"),
          notes.append = T,
          no.space=TRUE, 
          out="TableD2.tex")


###Downstream effects on vote choice:

##vote share MORENA:

#IPW:
share_morena_ipw_cl <- felm(share_morena_deputies ~ diputado_dummy | 0 | 0 | circunscripcion + tombola, data=valid_diputado_dummy, weights=valid_diputado_dummy$ipw)

#FE Model:
share_morena_fe <- felm(share_morena_deputies ~ diputado_dummy | tombola + circunscripcion | 0 | estado + tombola, data=valid_diputado_dummy)

##vote share PRD:

#IPW:
share_prd_ipw_cl <- felm(share_pan_prd_deputies ~ diputado_dummy | 0 | 0 | circunscripcion + tombola, data=valid_diputado_dummy, weights=valid_diputado_dummy$ipw)

#FE Model:
share_prd_fe <- felm(share_pan_prd_deputies ~ diputado_dummy | tombola + circunscripcion | 0 | estado + tombola, data=valid_diputado_dummy)


##vote share PRI:

#IPW:
share_pri_ipw_cl <- felm(share_pri_deputies ~ diputado_dummy | 0 | 0 | circunscripcion + tombola, data=valid_diputado_dummy, weights=valid_diputado_dummy$ipw)

#FE Model:
share_pri_fe <- felm(share_pri_deputies ~ diputado_dummy | tombola + circunscripcion | 0 | estado, data=valid_diputado_dummy)


##effect on margin-of-victory for MORENA at municipal level (over strongest other party):

for(l in 1:nrow(valid_diputado_dummy)){
  valid_diputado_dummy$MoV_MORENA_deputies[l] <- valid_diputado_dummy$share_morena_deputies[l] - max(c(valid_diputado_dummy$share_pan_prd_deputies[l],valid_diputado_dummy$share_pri_deputies[l]))
}

#IPW:
MoV_morena_ipw_cl <- felm(MoV_MORENA_deputies ~ diputado_dummy | 0 | 0 | circunscripcion + tombola, data=valid_diputado_dummy, weights=valid_diputado_dummy$ipw)

#FE Model:
MoV_morena_fe <- felm(MoV_MORENA_deputies ~ diputado_dummy | tombola + circunscripcion | 0 | estado + tombola, data=valid_diputado_dummy)


##Effect on margin-of-victory for MORENA over PRD/PAN at municipal level:

for(l in 1:nrow(valid_diputado_dummy)){
  valid_diputado_dummy$MoV_MORENA_PRD_deputies[l] <- valid_diputado_dummy$share_morena_deputies[l] - valid_diputado_dummy$share_pan_prd_deputies[l]
}

#IPW:
MoV_morena_prd_ipw_cl <- felm(MoV_MORENA_PRD_deputies ~ diputado_dummy | 0 | 0 | circunscripcion + tombola, data=valid_diputado_dummy, weights=valid_diputado_dummy$ipw)

#FE Model:
MoV_morena_prd_fe <- felm(MoV_MORENA_PRD_deputies ~ diputado_dummy | tombola + circunscripcion | 0 | estado + tombola, data=valid_diputado_dummy)


###Combine all results:

##main results (with clustered SEs):
findings_cl <- as.data.frame(rbind(rep(NA,10),rep(NA,10),rep(NA,10),rep(NA,10),rep(NA,10)))

findings_cl[1,] <- c("share_morena",summary(share_morena_ipw_cl)$coefficients[2,c(1,2,4)],summary(share_morena_ipw_cl)$coefficients[1],summary(share_morena_ipw_cl)$N,summary(share_morena_fe)$coefficients[c(1,2,4)],summary(share_morena_fe)$N)
findings_cl[2,] <- c("share_prd",summary(share_prd_ipw_cl)$coefficients[2,c(1,2,4)],summary(share_prd_ipw_cl)$coefficients[1],summary(share_prd_ipw_cl)$N,summary(share_prd_fe)$coefficients[c(1,2,4)],summary(share_prd_fe)$N)
findings_cl[3,] <- c("share_pri",summary(share_pri_ipw_cl)$coefficients[2,c(1,2,4)],summary(share_pri_ipw_cl)$coefficients[1],summary(share_pri_ipw_cl)$N,summary(share_pri_fe)$coefficients[c(1,2,4)],summary(share_pri_fe)$N)
findings_cl[4,] <- c("MoV_morena",summary(MoV_morena_ipw_cl)$coefficients[2,c(1,2,4)],summary(MoV_morena_ipw_cl)$coefficients[1],summary(MoV_morena_ipw_cl)$N,summary(MoV_morena_fe)$coefficients[c(1,2,4)],summary(MoV_morena_fe)$N)
findings_cl[5,] <- c("MoV_morena_prd",summary(MoV_morena_prd_ipw_cl)$coefficients[2,c(1,2,4)],summary(MoV_morena_prd_ipw_cl)$coefficients[1],summary(MoV_morena_prd_ipw_cl)$N,summary(MoV_morena_prd_fe)$coefficients[c(1,2,4)],summary(MoV_morena_prd_fe)$N)
findings_cl[6,] <- c("padron_increase",summary(combined_padron_per_ipw_cl)$coefficients[2,c(1,2,4)],summary(combined_padron_per_ipw_cl)$coefficients[1],summary(combined_padron_per_ipw_cl)$N,summary(combined_padron_per_fe)$coefficients[c(1,2,4)],summary(combined_padron_per_fe)$N)
findings_cl[7,] <- c("padron_total",summary(combined_padron_total_per_ipw_cl)$coefficients[2,c(1,2,4)],summary(combined_padron_total_per_ipw_cl)$coefficients[1],summary(combined_padron_total_per_ipw_cl)$N,summary(combined_padron_total_per_fe)$coefficients[c(1,2,4)],summary(combined_padron_total_per_fe)$N)
findings_cl[8,] <- c("padron_increase_control",summary(combined_padron_per_control_ipw_cl)$coefficients[2,c(1,2,4)],summary(combined_padron_per_control_ipw_cl)$coefficients[1],summary(combined_padron_per_control_ipw_cl)$N,summary(combined_padron_per_control_fe)$coefficients[c(1,2,4)],summary(combined_padron_per_control_fe)$N)


colnames(findings_cl) <- c("Outcome","ATE (IPW)", "SE (IPW)","p-value (IPW)","Intercept (IPW)","n (IPW)","ATE (FE)", "SE (FE)","p-value (FE)","n (FE)")
findings_cl[,2] <- as.numeric(findings_cl[,2])
findings_cl[,3] <- as.numeric(findings_cl[,3])
findings_cl[,4] <- as.numeric(findings_cl[,4])
findings_cl[,5] <- as.numeric(findings_cl[,5])
findings_cl[,6] <- as.numeric(findings_cl[,6])
findings_cl[,7] <- as.numeric(findings_cl[,7])
findings_cl[,8] <- as.numeric(findings_cl[,8])
findings_cl[,9] <- as.numeric(findings_cl[,9])
findings_cl[,10] <- as.numeric(findings_cl[,10])

findings_cl


##share_morena:
results_share_morena <- as.data.frame(rbind("Control","Treatment"))
results_share_morena$Estimates <- rbind(findings_cl[1,]$`Intercept (IPW)`*100, (findings_cl[1,]$`Intercept (IPW)` + findings_cl[1,]$`ATE (IPW)`)*100)
colnames(results_share_morena)[1] <- "Group"
results_share_morena$Estimates <- round(results_share_morena$Estimates,2)
results_share_morena


##Figure 5:
pd <- position_dodge(0.1)
pdf("fg5.pdf", width = 5, height = 5)
share_morena_figure <- ggplot(results_share_morena, aes(x=factor(Group), y=Estimates, fill=factor(Group))) +
  geom_bar(position=pd, stat="identity", width=0.45, alpha=0.9) +
  geom_text(aes(label = Estimates), size = 4.0, vjust=-0.5, hjust = 0.75, nudge_x = 0.05) +
  ggtitle(" ") +
  xlab("") +
  ylab("Vote Share (%)") +
  ylim(0, 100) +
  theme_minimal() +
  theme(axis.text.y=element_text(colour = "gray30", size=11), axis.ticks.y=element_blank()) +
  theme(axis.text.x=element_text(colour = "gray30", size=11), axis.ticks.x=element_line(colour="gray30"))
share_morena_figure<- share_morena_figure + geom_segment(aes(x=1, y=70, xend=1.2, yend=73), colour="darkgray") +
  geom_segment(aes(x=1.2, y=73, xend=1.8, yend=73), colour="darkgray") +
  geom_segment(aes(x=1.8, y=73, xend=2, yend=70), colour="darkgray")
share_morena_figure <- share_morena_figure + annotate("text", x=1.5, y=76, label="+5.10%p (p=0.0302)", size=4)

share_morena_figure <- share_morena_figure + scale_fill_manual(values=c("cornflowerblue", "darkred"))
share_morena_figure <- share_morena_figure + theme(legend.position="none")
print(share_morena_figure)
dev.off()


##Table with vote choice findings:
results_table <- findings_cl[c(2:5),]
results_table$Control <- results_table$`Intercept (IPW)`
results_table$Treatment <- results_table$`Intercept (IPW)` + results_table$`ATE (IPW)`

results_table <- results_table[,c(1,11:12,2:4,6)]
results_table$Control <- round(results_table$Control,4)
results_table$Treatment <- round(results_table$Treatment,4)
results_table$`ATE (IPW)` <- round(results_table$`ATE (IPW)`,4)
results_table$`SE (IPW)` <- round(results_table$`SE (IPW)`,4)
results_table$`p-value (IPW)` <- round(results_table$`p-value (IPW)`/2,4)

results_table$Outcome <- c("Vote Share for PRD","Vote Share for PRI","MoV: MORENA over Runner-up", "MoV: MORENA over PRD/PAN")
names(results_table)[c(4:7)] <- c("ATE","SE","p-value","n")

results_table

#Table 6:
stargazer(as.matrix(results_table), rownames = F, 
          out="Table6.tex")


##Table with replication of findings with FEs:
results_table_fe <- findings_cl[c(1:5),]

results_table_fe <- results_table_fe[,c(1,7:10)]
results_table_fe$`ATE (FE)` <- round(results_table_fe$`ATE (FE)`,4)
results_table_fe$`SE (FE)` <- round(results_table_fe$`SE (FE)`,4)
results_table_fe$`p-value (FE)` <- round(results_table_fe$`p-value (FE)`/2,4)

results_table_fe$Outcome <- c("Vote Share for MORENA","Vote Share for PRD","Vote Share for PRI","MoV: MORENA over Runner-up", "MoV: MORENA over PRD/PAN")
names(results_table_fe)[c(2:5)] <- c("ATE","SE","p-value","n")

results_table_fe

#Table E2:
stargazer(as.matrix(results_table_fe), rownames = F, 
          out="TableE2.tex")


##Table with replication of main findings with FEs:
padron_table_fe <- findings_cl[c(6:7),]

padron_table_fe <- padron_table_fe[,c(1,7:10)]
padron_table_fe$`ATE (FE)` <- round(padron_table_fe$`ATE (FE)`,4)
padron_table_fe$`SE (FE)` <- round(padron_table_fe$`SE (FE)`,4)
padron_table_fe$`p-value (FE)` <- round(padron_table_fe$`p-value (FE)`/2,4)

padron_table_fe$Outcome <- c("Total MORENA Members","New MORENA Members")
names(padron_table_fe)[c(2:5)] <- c("ATE","SE","p-value","n")

padron_table_fe

#Table E1:
stargazer(as.matrix(padron_table_fe), rownames = F, 
          out="TableE1.tex")


###Effect of access to office on local party institutionalization by whether municipality was (also) represented by other deputy:

##Effect on proportion of registered voters who affiliated with the party AFTER to the lottery per 100,000:
combined_padron_per_mult_elect_0_ipw_cl <- felm(combined_padron_per ~ diputado_dummy | 0 | 0 | circunscripcion + tombola, data=subset(valid_diputado_dummy,mult_elect==0), weights=subset(valid_diputado_dummy,mult_elect==0)$ipw)
combined_padron_per_mult_elect_1_ipw_cl <- felm(combined_padron_per ~ diputado_dummy | 0 | 0 | circunscripcion + tombola, data=subset(valid_diputado_dummy,mult_elect==1), weights=subset(valid_diputado_dummy,mult_elect==1)$ipw)
combined_padron_per_mult_elect_interact_ipw_cl <- felm(combined_padron_per ~ diputado_dummy * mult_elect  | 0 | 0 | circunscripcion + tombola, data=valid_diputado_dummy, weights=valid_diputado_dummy$ipw)

##Effect on total proportion of registered voters who affiliated by the end of 2018 per 100,000:
combined_padron_total_per_mult_elect_0_ipw_cl <- felm(combined_padron_total_per ~ diputado_dummy | 0 | 0 | circunscripcion + tombola, data=subset(valid_diputado_dummy,mult_elect==0), weights=subset(valid_diputado_dummy,mult_elect==0)$ipw)
combined_padron_total_per_mult_elect_1_ipw_cl <- felm(combined_padron_total_per ~ diputado_dummy | 0 | 0 | circunscripcion + tombola, data=subset(valid_diputado_dummy,mult_elect==1), weights=subset(valid_diputado_dummy,mult_elect==1)$ipw)
combined_padron_total_per_mult_elect_interact_ipw_cl <- felm(combined_padron_total_per ~ diputado_dummy * mult_elect  | 0 | 0 | circunscripcion + tombola, data=valid_diputado_dummy, weights=valid_diputado_dummy$ipw)


##combining results:
findings_mult_elect_cl <- as.data.frame(rbind(rep(NA,6),rep(NA,6),rep(NA,6),rep(NA,6),rep(NA,6),rep(NA,6)))

findings_mult_elect_cl[1,] <- c("combined_padron_per_mult_elect_0",summary(combined_padron_per_mult_elect_0_ipw_cl)$coefficients[2,c(1,2,4)],summary(combined_padron_per_mult_elect_0_ipw_cl)$coefficients[1],summary(combined_padron_per_mult_elect_0_ipw_cl)$N)
findings_mult_elect_cl[2,] <- c("combined_padron_per_mult_elect_1",summary(combined_padron_per_mult_elect_1_ipw_cl)$coefficients[2,c(1,2,4)],summary(combined_padron_per_mult_elect_1_ipw_cl)$coefficients[1],summary(combined_padron_per_mult_elect_1_ipw_cl)$N)
findings_mult_elect_cl[3,] <- c("combined_padron_per_mult_elect_interact",summary(combined_padron_per_mult_elect_interact_ipw_cl)$coefficients[4,c(1,2,4)],summary(combined_padron_per_mult_elect_interact_ipw_cl)$coefficients[1],summary(combined_padron_per_mult_elect_interact_ipw_cl)$N)

findings_mult_elect_cl[4,] <- c("combined_padron_total_per_mult_elect_0",summary(combined_padron_total_per_mult_elect_0_ipw_cl)$coefficients[2,c(1,2,4)],summary(combined_padron_total_per_mult_elect_0_ipw_cl)$coefficients[1],summary(combined_padron_total_per_mult_elect_0_ipw_cl)$N)
findings_mult_elect_cl[5,] <- c("combined_padron_total_per_mult_elect_1",summary(combined_padron_total_per_mult_elect_1_ipw_cl)$coefficients[2,c(1,2,4)],summary(combined_padron_total_per_mult_elect_1_ipw_cl)$coefficients[1],summary(combined_padron_total_per_mult_elect_1_ipw_cl)$N)
findings_mult_elect_cl[6,] <- c("combined_padron_total_per_mult_elect_interact",summary(combined_padron_total_per_mult_elect_interact_ipw_cl)$coefficients[4,c(1,2,4)],summary(combined_padron_total_per_mult_elect_interact_ipw_cl)$coefficients[1],summary(combined_padron_total_per_mult_elect_interact_ipw_cl)$N)

findings_mult_elect_cl$other_mp <- c("No","Yes","Difference","No","Yes","Difference")

findings_mult_elect_cl <- findings_mult_elect_cl[,c(1,7,2:4,6)]

colnames(findings_mult_elect_cl) <- c("Outcome","Other MP","ATE", "SE","p-value","n")
findings_mult_elect_cl[,3] <- as.numeric(findings_mult_elect_cl[,3])
findings_mult_elect_cl[,4] <- as.numeric(findings_mult_elect_cl[,4])
findings_mult_elect_cl[,5] <- as.numeric(findings_mult_elect_cl[,5])
findings_mult_elect_cl[,6] <- as.numeric(findings_mult_elect_cl[,6])
findings_mult_elect_cl$`p-value` <- findings_mult_elect_cl$`p-value` #one-tailed tests

findings_mult_elect_cl$ATE <- round(findings_mult_elect_cl$ATE,4)
findings_mult_elect_cl$SE <- round(findings_mult_elect_cl$SE,4)
findings_mult_elect_cl$`p-value` <- round(findings_mult_elect_cl$`p-value`,4)

findings_mult_elect_cl$Outcome <- c("New Members","New Members","New Members","Total Members","Total Members","Total Members")

findings_mult_elect_cl

#Table G1:
stargazer(as.matrix(findings_mult_elect_cl), rownames = F, 
          out="TableG1.tex")



###Balance statistics for other municipality characteristics:
### drawing on census & CONEVAL data:

##import data with information on municipality characteristics:
mun_charact <- read.csv("mun_charact.csv", header=T, row.names=1, sep=",") #import data with information on municipality characteristics

##some data cleaning:
mun_charact$uid_join <- paste0(mun_charact$estado,"_",mun_charact$municipio) #create join variable to merge with other data

valid_diputado_dummy$uid_join <- paste0(valid_diputado_dummy$estado,"_",valid_diputado_dummy$municipio_residencia) #create join variable to merge with other data
valid_diputado_dummy <- left_join(valid_diputado_dummy,mun_charact, by="uid_join",copy=F)

names(valid_diputado_dummy)[names(valid_diputado_dummy)=="X.sector.primario"] <- "sector.primario"
names(valid_diputado_dummy)[names(valid_diputado_dummy)=="X.sector.comercio"] <- "sector.comercio"
names(valid_diputado_dummy)[names(valid_diputado_dummy)=="X.sector.servicios"] <- "sector.servicios"
names(valid_diputado_dummy)[names(valid_diputado_dummy)=="X.Trabajadores.de.la.Industria"] <- "Trabajadores.de.la.Industria"

##Industrial workers:
Trabajadores.de.la.Industria_ipw <- felm(Trabajadores.de.la.Industria ~ diputado_dummy | 0 | 0 | circunscripcion + tombola, data=valid_diputado_dummy, weights=valid_diputado_dummy$ipw)
Trabajadores.de.la.Industria_fe <- felm(Trabajadores.de.la.Industria ~ diputado_dummy | tombola + circunscripcion | 0 | estado.x + tombola, data=valid_diputado_dummy)

##Industrial workers:
sector.primario_ipw <- felm(sector.primario ~ diputado_dummy | 0 | 0 | circunscripcion + tombola, data=valid_diputado_dummy, weights=valid_diputado_dummy$ipw)
sector.primario_fe <- felm(sector.primario ~ diputado_dummy | tombola + circunscripcion | 0 | estado.x + tombola, data=valid_diputado_dummy)

##Commercial sector workers:
sector.comercio_ipw <- felm(sector.comercio ~ diputado_dummy | 0 | 0 | circunscripcion + tombola, data=valid_diputado_dummy, weights=valid_diputado_dummy$ipw)
sector.comercio_fe <- felm(sector.comercio ~ diputado_dummy | tombola + circunscripcion | 0 | estado.x + tombola, data=valid_diputado_dummy)

##Service sector workers:
sector.servicios_ipw <- felm(sector.servicios ~ diputado_dummy | 0 | 0 | circunscripcion + tombola, data=valid_diputado_dummy, weights=valid_diputado_dummy$ipw)
sector.servicios_fe <- felm(sector.servicios ~ diputado_dummy | tombola + circunscripcion | 0 | estado.x + tombola, data=valid_diputado_dummy)

##Different age segments of the population:

#creating variables for proportion of population ages 0-29, 30-49, and 50+:
valid_diputado_dummy$poblacion.00.29.anos <- valid_diputado_dummy$poblacion.00.04.anos + valid_diputado_dummy$poblacion.05.09.anos + valid_diputado_dummy$poblacion.10.14.anos + valid_diputado_dummy$poblacion.15.19.anos + valid_diputado_dummy$poblacion.20.24.anos + valid_diputado_dummy$poblacion.25.29.anos
valid_diputado_dummy$poblacion.30.49.anos <- valid_diputado_dummy$poblacion.30.34.anos + valid_diputado_dummy$poblacion.35.39.anos + valid_diputado_dummy$poblacion.40.44.anos + valid_diputado_dummy$poblacion.45.49.anos
valid_diputado_dummy$poblacion.50.anos.y.mas <- valid_diputado_dummy$poblacion.50.54.anos + valid_diputado_dummy$poblacion.55.59.anos + valid_diputado_dummy$poblacion.60.64.anos + valid_diputado_dummy$poblacion.65.69.anos + valid_diputado_dummy$poblacion.70.74.anos + valid_diputado_dummy$poblacion.75.anos.y.mas
valid_diputado_dummy$total_population <- valid_diputado_dummy$poblacion.00.29.anos + valid_diputado_dummy$poblacion.30.49.anos + valid_diputado_dummy$poblacion.50.anos.y.mas
valid_diputado_dummy$poblacion.00.29.anos <- valid_diputado_dummy$poblacion.00.29.anos / valid_diputado_dummy$total_population
valid_diputado_dummy$poblacion.30.49.anos <- valid_diputado_dummy$poblacion.30.49.anos / valid_diputado_dummy$total_population
valid_diputado_dummy$poblacion.50.anos.y.mas <- valid_diputado_dummy$poblacion.50.anos.y.mas / valid_diputado_dummy$total_population

#analysis:
poblacion.00.29.anos_ipw <- felm(poblacion.00.29.anos ~ diputado_dummy | 0 | 0 | circunscripcion + tombola, data=valid_diputado_dummy, weights=valid_diputado_dummy$ipw)
poblacion.00.29.anos_fe <- felm(poblacion.00.29.anos ~ diputado_dummy | tombola + circunscripcion | 0 | estado.x + tombola, data=valid_diputado_dummy)

poblacion.30.49.anos_ipw <- felm(poblacion.30.49.anos ~ diputado_dummy | 0 | 0 | circunscripcion + tombola, data=valid_diputado_dummy, weights=valid_diputado_dummy$ipw)
poblacion.30.49.anos_fe <- felm(poblacion.30.49.anos ~ diputado_dummy | tombola + circunscripcion | 0 | estado.x + tombola, data=valid_diputado_dummy)

poblacion.50.anos.y.mas_ipw <- felm(poblacion.50.anos.y.mas ~ diputado_dummy | 0 | 0 | circunscripcion + tombola, data=valid_diputado_dummy, weights=valid_diputado_dummy$ipw)
poblacion.50.anos.y.mas_fe <- felm(poblacion.50.anos.y.mas ~ diputado_dummy | tombola + circunscripcion | 0 | estado.x + tombola, data=valid_diputado_dummy)

##Income vulnerable population (percent):
Porcentaje.de.poblacion.vulnerable.por.ingresos_ipw <- felm(Porcentaje.de.poblacion.vulnerable.por.ingresos ~ diputado_dummy | 0 | 0 | circunscripcion + tombola, data=valid_diputado_dummy, weights=valid_diputado_dummy$ipw)
Porcentaje.de.poblacion.vulnerable.por.ingresos_fe <- felm(Porcentaje.de.poblacion.vulnerable.por.ingresos ~ diputado_dummy | tombola + circunscripcion | 0 | estado.x + tombola, data=valid_diputado_dummy)


##combine results:
balance <- as.data.frame(rbind(rep(NA,10),rep(NA,10),rep(NA,10),rep(NA,10),rep(NA,10),rep(NA,10),rep(NA,10),rep(NA,10)))

balance[1,] <- c("poblacion.00.29.anos",summary(poblacion.00.29.anos_ipw)$coefficients[2,c(1,2,4)],summary(poblacion.00.29.anos_ipw)$coefficients[1],summary(poblacion.00.29.anos_ipw)$N,summary(poblacion.00.29.anos_fe)$coefficients[c(1,2,4)],summary(poblacion.00.29.anos_fe)$N)
balance[2,] <- c("poblacion.30.49.anos",summary(poblacion.30.49.anos_ipw)$coefficients[2,c(1,2,4)],summary(poblacion.30.49.anos_ipw)$coefficients[1],summary(poblacion.30.49.anos_ipw)$N,summary(poblacion.30.49.anos_fe)$coefficients[c(1,2,4)],summary(poblacion.30.49.anos_fe)$N)
balance[3,] <- c("poblacion.50.anos.y.mas",summary(poblacion.50.anos.y.mas_ipw)$coefficients[2,c(1,2,4)],summary(poblacion.50.anos.y.mas_ipw)$coefficients[1],summary(poblacion.50.anos.y.mas_ipw)$N,summary(poblacion.50.anos.y.mas_fe)$coefficients[c(1,2,4)],summary(poblacion.50.anos.y.mas_fe)$N)
balance[4,] <- c("sector.primario",summary(sector.primario_ipw)$coefficients[2,c(1,2,4)],summary(sector.primario_ipw)$coefficients[1],summary(sector.primario_ipw)$N,summary(sector.primario_fe)$coefficients[c(1,2,4)],summary(sector.primario_fe)$N)
balance[5,] <- c("Trabajadores.de.la.Industria",summary(Trabajadores.de.la.Industria_ipw)$coefficients[2,c(1,2,4)],summary(Trabajadores.de.la.Industria_ipw)$coefficients[1],summary(Trabajadores.de.la.Industria_ipw)$N,summary(Trabajadores.de.la.Industria_fe)$coefficients[c(1,2,4)],summary(Trabajadores.de.la.Industria_fe)$N)
balance[6,] <- c("sector.comercio",summary(sector.comercio_ipw)$coefficients[2,c(1,2,4)],summary(sector.comercio_ipw)$coefficients[1],summary(sector.comercio_ipw)$N,summary(sector.comercio_fe)$coefficients[c(1,2,4)],summary(sector.comercio_fe)$N)
balance[7,] <- c("sector.servicios",summary(sector.servicios_ipw)$coefficients[2,c(1,2,4)],summary(sector.servicios_ipw)$coefficients[1],summary(sector.servicios_ipw)$N,summary(sector.servicios_fe)$coefficients[c(1,2,4)],summary(sector.servicios_fe)$N)
balance[8,] <- c("Porcentaje.de.poblacion.vulnerable.por.ingresos",summary(Porcentaje.de.poblacion.vulnerable.por.ingresos_ipw)$coefficients[2,c(1,2,4)],summary(Porcentaje.de.poblacion.vulnerable.por.ingresos_ipw)$coefficients[1],summary(Porcentaje.de.poblacion.vulnerable.por.ingresos_ipw)$N,summary(Porcentaje.de.poblacion.vulnerable.por.ingresos_fe)$coefficients[c(1,2,4)],summary(Porcentaje.de.poblacion.vulnerable.por.ingresos_fe)$N)
balance[9,] <- c("MORENA Members (Baseline)",summary(pre_tombola_percent_ipw_cl)$coefficients[2,c(1,2,4)],summary(pre_tombola_percent_ipw_cl)$coefficients[1],summary(pre_tombola_percent_ipw_cl)$N,summary(pre_tombola_percent_fe)$coefficients[c(1,2,4)],summary(pre_tombola_percent_fe)$N)


colnames(balance) <- c("Outcome","ATE (IPW)", "SE (IPW)","p-value (IPW)","Intercept (IPW)","n (IPW)","ATE (FE)", "SE (FE)","p-value (FE)","n (FE)")
balance[,2] <- as.numeric(balance[,2])
balance[,3] <- as.numeric(balance[,3])
balance[,4] <- as.numeric(balance[,4])
balance[,5] <- as.numeric(balance[,5])
balance[,6] <- as.numeric(balance[,6])
balance[,7] <- as.numeric(balance[,7])
balance[,8] <- as.numeric(balance[,8])
balance[,9] <- as.numeric(balance[,9])
balance[,10] <- as.numeric(balance[,10])

balance

##table with balance tests:
balance_ipw <- balance
balance_ipw$Control <- balance_ipw$`Intercept (IPW)`
balance_ipw$Treatment <- balance_ipw$`Intercept (IPW)` + balance_ipw$`ATE (IPW)`

balance_ipw <- balance_ipw[,c(1,11:12,2:4,6)]
balance_ipw[c(4:8),c(2:5)] <- balance_ipw[c(4:8),c(2:5)]/100
balance_ipw$Control <- round(balance_ipw$Control,4)
balance_ipw$Treatment <- round(balance_ipw$Treatment,4)
balance_ipw$`ATE (IPW)` <- round(balance_ipw$`ATE (IPW)`,4)
balance_ipw$`SE (IPW)` <- round(balance_ipw$`SE (IPW)`,4)
balance_ipw$`p-value (IPW)` <- round(balance_ipw$`p-value (IPW)`,4)

balance_ipw$Outcome <- c("Population Ages 0-29","Population Ages 30-49","Population Ages 50+","Primary Sector Workers",
                         "Industrial Workers","Commercial Sector Workers","Service Sector Workers","Income Vulnerable Population",
                         "MORENA Members (Baseline)")

names(balance_ipw)[c(4:7)] <- c("ATE","SE","p-value","n")

balance_ipw

#Table 3:
stargazer(as.matrix(balance_ipw), rownames = F, 
          out="Table3.tex")

#F-Test:
f_test_balance <- felm(diputado_dummy ~ poblacion.30.49.anos + poblacion.50.anos.y.mas + sector.primario + sector.comercio + sector.servicios + Porcentaje.de.poblacion.vulnerable.por.ingresos + pre_tombola_percent | 0 | 0 | circunscripcion + tombola, data=valid_diputado_dummy, weights = valid_diputado_dummy$ipw)
#omitted age category: poblacion.00.29.anos
summary(f_test_balance)


##table with balance tests with FE:
balance_fe <- balance
balance_fe <- balance_fe[,c(1,7:10)]
balance_fe[c(4:8),c(2:3)] <- balance_fe[c(4:8),c(2:3)]/100

balance_fe$`ATE (FE)` <- round(balance_fe$`ATE (FE)`,4)
balance_fe$`SE (FE)` <- round(balance_fe$`SE (FE)`,4)
balance_fe$`p-value (FE)` <- round(balance_fe$`p-value (FE)`,4)

balance_fe$Outcome <- c("Population Ages 0-29","Population Ages 30-49","Population Ages 50+","Primary Sector Workers",
                        "Industrial Workers","Commercial Sector Workers","Service Sector Workers","Income Vulnerable Population",
                        "MORENA Members (Baseline)")
names(balance_fe)[c(2:5)] <- c("ATE","SE","p-value","n")
balance_fe

#Table C1:
stargazer(as.matrix(balance_fe), rownames = F, 
          out="TableC1.tex")


##Equivalance tests:

##adapting code from Hartman and Hidalgo (2018)

###################
#### Functions ####
###################
equiv.t.test <- function(x, y, alpha = .1, epsilon = .2, std.err = "nominal", cluster.x = NULL, cluster.y = NULL) {
  x = x[!is.na(x)]
  y = y[!is.na(y)]
  
  
  dbar <- mean(x) - mean(y)
  m <- as.double(length(x))
  n <- as.double(length(y))
  N <- m+n
  x.var <-  var(x)
  y.var <- var(y)
  non.cent <- (m*n*epsilon^2)/N
  critical.const <- sqrt(qf(alpha,1,N-2,non.cent))
  
  se = sqrt((m-1)*x.var + (n-1)*y.var) / sqrt(m*n * (N-2)/N)
  df = N - 2
  
  t.stat <-  dbar / se
  p = pf(abs(t.stat)^2, 1, df , non.cent)
  obs_smd = (mean(x) - mean(y)) / sd(y)
  inverted <- try(uniroot(function(x) pf(abs(t.stat)^2, 1, N-2, ncp = (m*n*x^2)/N) - ifelse(pf(abs(t.stat)^2, 1, N-2, ncp = (m*n*0^2)/N) < alpha, pf(abs(t.stat)^2, 1, N-2, ncp = (m*n*obs_smd^2)/N), alpha), c(0,10*abs(t.stat)), tol = 0.0001)$root, silent = TRUE)
  if(class(inverted) == "try-error") {
    inverted = NA
  }
  rej = abs(t.stat) <= critical.const
  return(list(t.stat = t.stat, critical.const = critical.const, power = 2*pt(critical.const, N-2)-1, rej = rej, p = p, inverted = inverted))
}



# a helper function to run the equivalence tests on all balance covariates
run_equiv = function(X, Tr, epsilon.method = "std.effect", Y = NULL, 
                     custom.epsilon = NULL, std.err = "nominal", type = "equiv.t.test", 
                     fdr_correct = FALSE, cluster.x = NULL, cluster.y = NULL) {
  switch(epsilon.method,
         std.effect = {
           tol = abs(mean(Y[Tr == 1], na.rm = TRUE) - mean(Y[Tr == 0], na.rm = TRUE)) / sd(Y[Tr == 0], na.rm = TRUE)
         }, 
         custom = {
           if(is.null(custom.epsilon)) stop("ERROR: Must enter a custom epsilon value to use the 'custom' epsilon.method.")
           tol = custom.epsilon
         },
         strict = {
           tol = 0.36
         }, 
         liberal = {
           tol = 0.74
         }, 
         stop("ERROR: 'epsilon.method' not set to a valid option")
  )
  
  ranges = rep(tol, ncol(X))
  names(ranges) = names(X)
  print(ranges)
  
  # conduct equiv test for each variable
  tests = do.call("rbind", lapply(names(X), function(var) {
    print(var)
    
    res = equiv.t.test(X[Tr == 1, var], X[Tr == 0, var], epsilon = ranges[var], std.err = std.err, cluster.x = cluster.x, cluster.y = cluster.y)
    res$stat = res$t.stat
    res$obs_diff = (mean(X[Tr == 1, var], na.rm = TRUE) - mean(X[Tr == 0, var], na.rm = TRUE))
    res$obs_smd = ((mean(X[Tr == 1, var], na.rm = TRUE) - mean(X[Tr == 0, var], na.rm = TRUE))) / sd(X[Tr == 0, var], na.rm = TRUE)
    res$sd = sd(X[Tr == 0, var], na.rm = TRUE)
    
    return(c(res$inverted
             , round(res$p, 4)
             , res$obs_smd
             , res$obs_diff
             , res$power
             , res$sd))
    
  }))
  
  p.vals = unlist(tests[,2])
  names(p.vals) = names(ranges)
  
  power = unlist(tests[,5])
  names(power) = names(ranges)
  
  inverted = unlist(tests[,1])
  names(inverted) = names(ranges)
  
  sd = unlist(tests[,6])
  names(sd) = names(ranges)
  
  # return to scale of var
  inverted.scaled = unlist(lapply(names(inverted), function(var) {
    return(inverted[var] * sd[var])
  }))
  names(inverted.scaled) = names(ranges)
  
  observed.smd = unlist(tests[,3])
  names(observed.smd) = names(ranges)
  
  observed.diff = unlist(tests[,4])
  names(observed.diff) = names(ranges)
  
  # conduct BH FDR adjustment
  if(fdr_correct) {
    p.vals = p.adjust(p.vals, method = "BH")  
  }
  
  return(list(tol = ranges, inverted = inverted, inverted.scaled = inverted.scaled, p.vals = p.vals, observed.diff = observed.diff, observed.smd = observed.smd, power = power))
}

## a helper function to generate the plot
generate_plot = function(equiv_tests, panel.widths=c(1, 1, 5, 1, 1), display.names = NULL, var.rounding = 3, pval.rounding = 2, fdr_correct = FALSE) {
  .e <- environment()
  if(!is.null(display.names)) {
    equiv_tests$names = factor(display.names, levels = rev(display.names))
  } else {
    equiv_tests$names = factor(row.names(equiv_tests), levels = rev(row.names(equiv_tests)))
  }
  equiv_tests$const = rep(1, nrow(equiv_tests))
  equiv_tests = equiv_tests[nrow(equiv_tests):1, ]
  
  g = ggplot(equiv_tests, aes(x = names) )
  print(length(unique(equiv_tests$tol)))
  g = g + geom_linerange(aes(ymin = -1 * inverted, ymax = inverted), size = 5, color = "darkgray", alpha = 0.9)
  if(length(unique(equiv_tests$tol)) > 1){
    g = g + geom_linerange(aes(ymin = -1 * tol, ymax = tol), size = 10, color = 'gray')   
  } else {
    print("here")
    print(unique(equiv_tests$tol))
    lines = unique(equiv_tests$tol)
    g = g + geom_hline(yintercept = c(-1 * lines, lines), linetype = 2, size = 0.75)
  }
  g = g  + scale_shape_identity() + geom_point(aes(y = observed.smd), color = "black", shape = 18, size = 4)
  g = (g + coord_flip()
       + theme(
         axis.text.y = element_blank()
         , axis.ticks.y = element_blank()
         , axis.title.y = element_blank()
         , axis.text.x = element_text(size = 14)
         , axis.title.x = element_text(size = 14))
       + labs(x=NULL, y = paste("Equivalence Range (in standard deviations \u03C3)"), font=5) + theme_bw()
  )
  if(length(unique(equiv_tests$tol)) == 1) {
    g = g + ggtitle(paste0("Equivalence Tests  \n \n", "Equivalence Range: +/- ", round(unique(equiv_tests$tol), 2), "\u03C3 \n"))
  } else {
    g = g + ggtitle("Equivalence Tests  \n \n \n")
  }
  
  g_inv = ggplot(equiv_tests, aes(x = names, y = const, label = round(inverted.scaled, var.rounding)), environment = .e)
  g_inv = g_inv + geom_text()
  g_inv = (g_inv + theme_bw() + coord_flip()
           + theme(panel.grid.minor=element_blank()
                   , panel.grid.major=element_blank()
                   #, axis.line.x = element_blank()
                   , axis.text.x = element_text(color = "white", size = 14)
                   , axis.ticks.x = element_line(color = "white")
                   , axis.text.y = element_blank()
                   , axis.ticks.y = element_blank()
                   , axis.title.x = element_text(size = 14)
           )
           + ylim(1-.05, 1.05)
           + ggtitle("Equivalence\nConfidence\nInterval (+/-)\n(Scale of Var)")
           + labs(y = " ", x = NULL)
  )
  
  g_obs = ggplot(equiv_tests, aes(x = names, y = const, label = round(observed.diff, var.rounding)), environment = .e)
  g_obs = g_obs + geom_text()
  g_obs = (g_obs + theme_bw() + coord_flip()
           + theme(panel.grid.minor=element_blank()
                   , panel.grid.major=element_blank()
                   #, axis.line.x = element_blank()
                   , axis.text.x = element_text(color = "white", size = 14)
                   , axis.ticks.x = element_line(color = "white")
                   , axis.text.y = element_blank()
                   , axis.ticks.y = element_blank()
                   , axis.title.x = element_text(size = 14)
           )
           + ylim(1-.05, 1.05)
           + ggtitle("Observed\nMean\nDifference\n(Scale of Var)")
           + labs(y = " ", x = NULL)
  )
  
  g_pval = ggplot(equiv_tests, aes(x = names, y = const, label = round(p.vals, pval.rounding)), environment = .e)
  g_pval = g_pval + geom_text()
  g_pval = (g_pval + theme_bw() + coord_flip()
            + theme(panel.grid.minor=element_blank()
                    , panel.grid.major=element_blank()
                    #, axis.line.x = element_blank()
                    , axis.text.x = element_text(color = "white", size = 14)
                    , axis.ticks.x = element_line(color = "white")
                    , axis.text.y = element_blank()
                    , axis.ticks.y = element_blank()
                    , axis.title.x = element_text(size = 14)
            )
            + ylim(1-.05, 1.05)
            + ggtitle(ifelse(fdr_correct, "\nFDR\nCorrected\nP-value", "\n\n\nP-value"))
            + labs(y = " ", x = NULL)
  )
  
  g_var = ggplot(equiv_tests, aes(x = names, y = const, label = names))
  g_var = g_var + geom_text()
  g_var = (g_var + theme_bw() + coord_flip()
           + theme(panel.grid.minor=element_blank()
                   , panel.grid.major=element_blank()
                   #, axis.line.x = element_blank()
                   , axis.text.x = element_text(color = "white", size = 14)
                   , axis.ticks.x = element_line(color = "white")
                   , axis.text.y = element_blank()
                   , axis.ticks.y = element_blank()
                   , panel.border = element_blank()
                   , axis.title.x = element_text(size = 14)
           )
           + ylim(1-.05, 1.05)
           + ggtitle("\n \n \nVariable") + theme(plot.title = element_text(hjust = 0.5))
           + labs(y = " ", x = NULL)
  )
  
  grid.arrange(g_var, g_obs, g, g_inv, g_pval, ncol=5, nrow=1, widths= panel.widths, heights=c(4))
}


# #candidate characteristics:

replicate_strict = as.data.frame(run_equiv(X = subset(tombola, select = c("genero", "tiempo_de_residencia", "age")), 
                                           Tr = tombola$diputado_dummy, epsilon.method = "custom", custom.epsilon = 0.66,
                                           fdr_correct = TRUE))

#Figure A1:
CairoPDF(file = "FigA1.pdf", width = 12, height = 6)  
generate_plot(replicate_strict, panel.widths = c(2.5, 1.25, 5, 1.2, 1.2), 
              display.names = c("Female", "Years of Residence", "Age"), 
              pval.rounding = 3, fdr_correct = TRUE)
dev.off()



###Permutation inference (with controls for municipality size):

#subset valid_diputado_dummy to remove one observation with missing value on covariate:
valid_diputado_dummy_c <- subset(valid_diputado_dummy, !is.na(total_lista_nominal_2012))

# Declare randomization procedure
declaration <- declare_ra(N = length(valid_diputado_dummy_c$diputado_dummy), m = sum(valid_diputado_dummy_c$diputado_dummy==1))

#effect on new members:
##combined_padron_per_c:
Z <- valid_diputado_dummy_c$diputado_dummy
Y <- valid_diputado_dummy_c$combined_padron_per
C <- valid_diputado_dummy_c$total_lista_nominal_2012
dat <- data.frame(Y, Z, C)
set.seed(12102019)

# Conduct Randomization Inference
combined_padron_per_c <- conduct_ri(
  formula = Y ~ Z + C,
  declaration = declaration,
  sharp_hypothesis = 0,
  data = dat,
  sims = 50000,
  p = "two-tailed"
)
summary(combined_padron_per_c)


#effect on total members:
##combined_padron_total_per_c:
Z <- valid_diputado_dummy_c$diputado_dummy
Y <- valid_diputado_dummy_c$combined_padron_total_per
C <- valid_diputado_dummy_c$total_lista_nominal_2012
dat <- data.frame(Y, Z, C)
set.seed(12102019)

# Conduct Randomization Inference
combined_padron_total_per_c <- conduct_ri(
  formula = Y ~ Z + C,
  declaration = declaration,
  sharp_hypothesis = 0,
  data = dat,
  sims = 50000,
  p = "two-tailed"
)
summary(combined_padron_total_per_c)


##share_morena_deputies_c:
Z <- valid_diputado_dummy_c$diputado_dummy
Y <- valid_diputado_dummy_c$share_morena_deputies
C <- valid_diputado_dummy_c$total_lista_nominal_2012
dat <- data.frame(Y, Z, C)
set.seed(12102019)

# Conduct Randomization Inference
share_morena_deputies_c <- conduct_ri(
  formula = Y ~ Z + C,
  declaration = declaration,
  sharp_hypothesis = 0,
  data = dat,
  sims = 50000,
  p = "two-tailed"
)
summary(share_morena_deputies_c)


##share_pan_prd_deputies_c:
Z <- valid_diputado_dummy_c$diputado_dummy
Y <- valid_diputado_dummy_c$share_pan_prd_deputies
C <- valid_diputado_dummy_c$total_lista_nominal_2012
dat <- data.frame(Y, Z, C)
set.seed(12102019)

# Conduct Randomization Inference
share_pan_prd_deputies_c <- conduct_ri(
  formula = Y ~ Z + C,
  declaration = declaration,
  sharp_hypothesis = 0,
  data = dat,
  sims = 50000,
  p = "two-tailed"
)
summary(share_pan_prd_deputies_c)


##share_pri_deputies_c:
Z <- valid_diputado_dummy_c$diputado_dummy
Y <- valid_diputado_dummy_c$share_pri_deputies
C <- valid_diputado_dummy_c$total_lista_nominal_2012
dat <- data.frame(Y, Z, C)
set.seed(12102019)

# Conduct Randomization Inference
share_pri_deputies_c <- conduct_ri(
  formula = Y ~ Z + C,
  declaration = declaration,
  sharp_hypothesis = 0,
  data = dat,
  sims = 50000,
  p = "two-tailed"
)
summary(share_pri_deputies_c)


##MoV_MORENA_deputies_c:
Z <- valid_diputado_dummy_c$diputado_dummy
Y <- valid_diputado_dummy_c$MoV_MORENA_deputies
C <- valid_diputado_dummy_c$total_lista_nominal_2012
dat <- data.frame(Y, Z, C)
set.seed(12102019)

# Conduct Randomization Inference
MoV_MORENA_deputies_c <- conduct_ri(
  formula = Y ~ Z + C,
  declaration = declaration,
  sharp_hypothesis = 0,
  data = dat,
  sims = 50000,
  p = "two-tailed"
)
summary(MoV_MORENA_deputies_c)


##MoV_MORENA_PRD_deputies_c:
Z <- valid_diputado_dummy_c$diputado_dummy
Y <- valid_diputado_dummy_c$MoV_MORENA_PRD_deputies
C <- valid_diputado_dummy_c$total_lista_nominal_2012
dat <- data.frame(Y, Z, C)
set.seed(12102019)

# Conduct Randomization Inference
MoV_MORENA_PRD_deputies_c <- conduct_ri(
  formula = Y ~ Z + C,
  declaration = declaration,
  sharp_hypothesis = 0,
  data = dat,
  sims = 50000,
  p = "two-tailed"
)
summary(MoV_MORENA_PRD_deputies_c)


##combining results (RI):
findings_RI_c <- as.data.frame(rbind(rep(NA,3),rep(NA,3),rep(NA,3),rep(NA,3)))

findings_RI_c[1,] <- c("combined_padron_per_c",summary(combined_padron_per_c))[c(1,3:4)]
findings_RI_c[2,] <- c("combined_padron_total_per_c",summary(combined_padron_total_per_c))[c(1,3:4)]
findings_RI_c[3,] <- c("share_morena_deputies_c",summary(share_morena_deputies_c))[c(1,3:4)]
findings_RI_c[4,] <- c("share_pan_prd_deputies_c",summary(share_pan_prd_deputies_c))[c(1,3:4)]
findings_RI_c[5,] <- c("share_pri_deputies_c",summary(share_pri_deputies_c))[c(1,3:4)]
findings_RI_c[6,] <- c("MoV_MORENA_deputies_c",summary(MoV_MORENA_deputies_c))[c(1,3:4)]
findings_RI_c[7,] <- c("MoV_MORENA_PRD_deputies_c",summary(MoV_MORENA_PRD_deputies_c))[c(1,3:4)]

colnames(findings_RI_c) <- c("Outcome","RI ATE", "RI p-value")

findings_RI_c


##table with replication of main findings: randomization inference:
RI_table <- findings_RI_c

RI_table$`RI ATE` <- round(RI_table$`RI ATE`,4)
RI_table$`RI p-value` <- round(RI_table$`RI p-value`,4)

RI_table$Outcome <- c("New Members","Total Members","Vote Share for MORENA","Vote Share for PRD","Vote Share for PRI","MoV: MORENA over Runner-up","MoV: MORENA over PRD/PAN")
names(RI_table)[c(2:3)] <- c("ATE","p-value")

#Table F1:
stargazer(as.matrix(RI_table)[c(1:2),], rownames = F,
          type="latex", out = "TableF1.tex")

#Table F2:
stargazer(as.matrix(RI_table)[c(3:7),], rownames = F,
          type="latex", out = "TableF2.tex")

